from Spider import BaiduSpider


def get_lx_url_site_list():
    "从text文件中获取所有要爬取的条目"

    with open('.\lx_url_site(2).txt', 'r', encoding='utf8')as f:
        the_read = f.read().replace('"', '')  # .replace(' inurl:wcycn','')

    the_list_of_read = the_read.split('\n')

    the_list_of_read.remove('')
    # print(the_list_of_read)
    return the_list_of_read

#
# if __name__ == '__main__':
#     spider = BaiduSpider.Baidu.get_baidu_html
#     for word in get_lx_url_site_list():
#         page = 1
#         while 1:
#
#             try:
#                 next_page = spider(word=word.replace('"', ''), page=page)
#             except Exception as e:
#                 with open('error.txt', 'a', encoding='utf-8')as f:
#                     f.write(word + '---' + str(page))
#             page += 1
#             if next_page != '有下一页':
#                 break

# if __name__ == '__main__':
#     a=get_lx_url_site_list()
#     print(a)
